library(dplyr)
library(readr)
library(lsa)
library(tidyr)
library(ggplot2)
library(promptr)
library(stringr)
library(stargazer)

# Set seed
set.seed(1996)
#############################
rm(list=setdiff(ls(), c('script', 'scripts', 'log_file')))
#############################
# Load Data
master <- readRDS('Master_web.rds')

#############################
# Length of Statement and Extremity -- page 14
#############################
issue_statements <- readRDS('issue_statements2018_2022.rds')
issue_statements$text_length <- str_count(issue_statements$issue_text, '\\w+')

text_length <- issue_statements %>%
  group_by(candidate, cd, state_postal, year) %>%
  summarise(words = sum(text_length, na.rm = TRUE),
            statements = n())

text_length <- merge(text_length, master, by = c('candidate', 'state_postal',
                                                 'cd', 'year'))
text_length$extremity <- ifelse(text_length$cand_party == 'Democrat',
                                text_length$web_score*-1,
                                text_length$web_score)


cor(text_length$extremity, text_length$words)
cor(text_length$extremity, text_length$statements)
rm(text_length)

#############################
# Table B1: Embeddings Robustness
#############################
embeddings <- read.csv('ideology_estimates_robust.csv')
stargazer(cor(embeddings[,-1]))

#############################
# Table C1: Emphasis versus Positioning
#############################
# Load in Seperate scores estimated with additional 2 statements
scores <- read.csv("webscore_estimates_emphasis.csv")

# Pull out additional two statements
scores[scores$candidate_id == 'Blumenauer, Earl OR 3 2020 Healthcare',]
scores[scores$candidate_id == 'Jeff Duncan SC 3 2022 Immigration',]

# Compare with original WEB Scores from Master
master$web_score[master$candidate == 'Blumenauer, Earl' & master$year == 2020]
master$web_score[master$candidate == 'Jeff Duncan' & master$year == 2022]


#############################
# Figure 2: Histogram
#############################
# Analysis Summary Statistics
mean(master$web_score, na.rm = TRUE)
mean(subset(master, cand_party == 'Democrat')$web_score, na.rm = TRUE)
mean(subset(master, cand_party == 'Republican')$web_score, na.rm = TRUE)
sd(master$web_score, na.rm = TRUE)

# Subset to just candidates with WEB Scores
master_figure <- subset(master, !is.na(web_score))

# Plot histogram 
ggplot(master_figure, aes(x = web_score, fill = cand_party)) +
  geom_histogram(binwidth=.05, color="#e9ecef",alpha = .6, position = 'identity') +
  theme_bw()+ 
  scale_fill_manual(values = c('gray1', 'gray55'),
                     name = 'Candidate Party') +
  xlab('\nWEB Score') +
  ylab('Number of Candidates\n') +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.title = element_text(size = 25),
        legend.text = element_text(size = 20),
        legend.position = 'bottom')

ggsave('fg2.tiff', width = 15, height = 10, units = 'in')

#############################
# Table 1: Most Extreme Incumbents
#############################
incumbents <- subset(master, inc == 1)
# Scores Decreasing
incumbents <- incumbents[order(-incumbents$web_score),]
# Print Candidate Information
incumbents[1:10 ,c('candidate', 'year', 'web_score')]

# Scores Increasing
incumbents <- incumbents[order(incumbents$web_score),]
# Print Candidate Information
incumbents[1:10 ,c('candidate', 'year', 'web_score')]
rm(incumbents)

# Find WEB Scores for Candidates Referenced
master$web_score[master$candidate == 'Alexandia Ocasio-Cortez' & master$year == 2022]
master$web_score[master$candidate == 'Greene, Marjorie Taylor' & master$year == 2020]

#############################
# Table 2: Measurement Correlations
#############################
### CFScores versus WEB Scores (all for figure caption) ###
# Subset data to only those with scores
dime.cor <- subset(master, !is.na(cfscore) & !is.na(web_score))
dime.cor.dem <- subset(dime.cor, cand_party == 'Democrat')
dime.cor.rep <- subset(dime.cor, cand_party == 'Republican')

# All Candidate Correlation
cor(dime.cor$web_score, dime.cor$cfscore)
# Democrats
cor(dime.cor.dem$web_score, dime.cor.dem$cfscore)
# Republicans
cor(dime.cor.rep$web_score, dime.cor.rep$cfscore)

### CFScores, DW Nominate, WEB Scores Correlations (Table 2) ###
# Subsetting to only candidates with nominate, dime, and issue page
nom.cor <- subset(master, !is.na(nominate_dim1) & !is.na(cfscore) & !is.na(web_score))
# Splitting out just party candidates
nom.cor.dem <- subset(nom.cor, cand_party == 'Democrat')
nom.cor.rep <- subset(nom.cor, cand_party == 'Republican')

### CFScores versus WEB Scores (just members) ###
# All Candidate Correlation
cor(nom.cor$web_score, nom.cor$cfscore)
# Democrats
cor(nom.cor.dem$web_score, nom.cor.dem$cfscore)
# Republicans
cor(nom.cor.rep$web_score, nom.cor.rep$cfscore)

### Nominate versus WEB Scores (just members) ###
# All Candidate Correlation
cor(nom.cor$web_score, nom.cor$nominate_dim1)
# Democrats
cor(nom.cor.dem$web_score, nom.cor.dem$nominate_dim1)
# Republicans
cor(nom.cor.rep$web_score, nom.cor.rep$nominate_dim1)

### Nominate versus CFScores (just members) ###
# All Candidate Correlation
cor(nom.cor$cfscore, nom.cor$nominate_dim1)
# Democrats
cor(nom.cor.dem$cfscore, nom.cor.dem$nominate_dim1)
# Republicans
cor(nom.cor.rep$cfscore, nom.cor.rep$nominate_dim1)


#############################
# Table D1: GPT Validation and Hand Labels
#############################
# Load labeled data
issue_statements <- readRDS('paragraphs_withgpt.rds')

# Load hand labeled statement
handlabeled <- read.csv('sample_handlabeled.csv')

# Merge GPT and Hand Labeled
handlabeled <- merge(handlabeled, subset(issue_statements, select = c('paragraph_id', 'gpt_score')),
                     by = c('paragraph_id'))

handlabeled <- merge(handlabeled, subset(master, select = c('candidate_id', 'web_score')),
               by = c('candidate_id'), all.x = TRUE)

# Correlations
cor(handlabeled$hand_score, handlabeled$gpt_score,
    use = c('pairwise.complete.obs'))

cor(handlabeled$hand_score, handlabeled$web_score,
    use = c('pairwise.complete.obs'))

cor(subset(handlabeled, cand_party == 'Democrat')$hand_score, 
    subset(handlabeled, cand_party == 'Democrat')$gpt_score,
    use = c('pairwise.complete.obs'))

cor(subset(handlabeled, cand_party == 'Democrat')$hand_score, 
    subset(handlabeled, cand_party == 'Democrat')$web_score,
    use = c('pairwise.complete.obs'))

cor(subset(handlabeled, cand_party == 'Republican')$hand_score, 
    subset(handlabeled, cand_party == 'Republican')$gpt_score,
    use = c('pairwise.complete.obs'))

cor(subset(handlabeled, cand_party == 'Republican')$hand_score, 
    subset(handlabeled, cand_party == 'Republican')$web_score,
    use = c('pairwise.complete.obs'))


#############################
# Table 3: GPT Validation Comparing Scores
#############################
# Create Issue Statement Candidate ID
issue_statements$candidate_id <- paste(issue_statements$candidate, issue_statements$state_postal, 
                                       issue_statements$cd, issue_statements$year)
# Create Mean GPT Scores
mean_scores <- issue_statements %>%
  group_by(candidate_id) %>%
  summarise(mean_gptscore = mean(gpt_score, na.rm = TRUE))

# Merge with Master
master <- merge(master, mean_scores, by = c('candidate_id'), all.x = TRUE)

# Create Correlation Comparisons
# WEB vs CFScore 
### All candidates with WEB Score and CF Score Correlation
cor(subset(master, !is.na(web_score) & !is.na(cfscore) & !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(cfscore)& !is.na(mean_gptscore))$web_score)


cor(subset(master, !is.na(web_score) & !is.na(cfscore)& !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(cfscore)& !is.na(mean_gptscore))$cfscore)

### Democrat candidates with WEB Score and CF Score Correlation
cor(subset(master, !is.na(web_score) & !is.na(cfscore) & cand_party == 'Democrat'& !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(cfscore) & cand_party == 'Democrat'& !is.na(mean_gptscore))$web_score)


cor(subset(master, !is.na(web_score) & !is.na(cfscore) & cand_party == 'Democrat'& !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(cfscore) & cand_party == 'Democrat'& !is.na(mean_gptscore))$cfscore)

### Republican candidates with WEB Score and CF Score Correlation
cor(subset(master, !is.na(web_score) & !is.na(cfscore) & cand_party == 'Republican'& !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(cfscore) & cand_party == 'Republican'& !is.na(mean_gptscore))$web_score)


cor(subset(master, !is.na(web_score) & !is.na(cfscore) & cand_party == 'Republican'& !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(cfscore) & cand_party == 'Republican'& !is.na(mean_gptscore))$cfscore)

# WEB vs NOM
### All candidates with WEB Score and CF Score Correlation
cor(subset(master, !is.na(web_score) & !is.na(nominate_dim1) & !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(nominate_dim1)& !is.na(mean_gptscore))$web_score)


cor(subset(master, !is.na(web_score) & !is.na(nominate_dim1) & !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(nominate_dim1) & !is.na(mean_gptscore))$nominate_dim1)

### Democrat candidates with WEB Score and CF Score Correlation
cor(subset(master, !is.na(web_score) & !is.na(nominate_dim1) & cand_party == 'Democrat' & !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(nominate_dim1) & cand_party == 'Democrat' & !is.na(mean_gptscore))$web_score)


cor(subset(master, !is.na(web_score) & !is.na(nominate_dim1) & cand_party == 'Democrat'& !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(nominate_dim1) & cand_party == 'Democrat'& !is.na(mean_gptscore))$nominate_dim1)

### Republican candidates with WEB Score and CF Score Correlation
cor(subset(master, !is.na(web_score) & !is.na(nominate_dim1) & cand_party == 'Republican'& !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(nominate_dim1) & cand_party == 'Republican'& !is.na(mean_gptscore))$web_score)


cor(subset(master, !is.na(web_score) & !is.na(nominate_dim1) & cand_party == 'Republican'& !is.na(mean_gptscore))$mean_gptscore,
    subset(master, !is.na(web_score) & !is.na(nominate_dim1) & cand_party == 'Republican'& !is.na(mean_gptscore))$nominate_dim1)


#############################
# Figure E1: Caucus Difference in Means
#############################
# Subset to just incumbents
incumbents <- subset(master, inc == 1)

# Find mean party values among incumbents
dem_mean <- mean(subset(incumbents, cand_party == 'Democrat')$web_score, na.rm = TRUE)
rep_mean <- mean(subset(incumbents, cand_party == 'Republican')$web_score, na.rm = TRUE)

# Get Summary Statistics for each group
# Subset the data for each caucus and calculate the summary statistics
# Dem Progressive 
prog_summary <- incumbents %>% 
  filter(progressive == 1) %>% 
  summarise(mean_web_ideology = mean(web_score, na.rm = TRUE),
            sd_web_ideology = sd(web_score, na.rm = TRUE),
            n = sum(!is.na(web_score)))
prog_summary$caucus <- 'Progressive Caucus'
prog_summary$party <- 'Democrat'

# Dem New Democratic 
newdem_summary <- incumbents %>% 
  filter(newdem == 1) %>% 
  summarise(mean_web_ideology = mean(web_score, na.rm = TRUE),
            sd_web_ideology = sd(web_score, na.rm = TRUE),
            n = sum(!is.na(web_score)))
newdem_summary$caucus <- 'New Democratic Coalition'
newdem_summary$party <- 'Democrat'

# Dem Blue Dog
bluedog_summary <- incumbents %>% 
  filter(bluedog == 1) %>% 
  summarise(mean_web_ideology = mean(web_score, na.rm = TRUE),
            sd_web_ideology = sd(web_score, na.rm = TRUE),
            n = sum(!is.na(web_score)))
bluedog_summary$caucus <- 'Blue Dog Coalition'
bluedog_summary$party <- 'Democrat'

# Rep Main Street
mainstreet_summary <- incumbents %>% 
  filter(mainstreet == 1) %>% 
  summarise(mean_web_ideology = mean(web_score, na.rm = TRUE),
            sd_web_ideology = sd(web_score, na.rm = TRUE),
            n = sum(!is.na(web_score)))
mainstreet_summary$caucus <- 'Main Street Partnership'
mainstreet_summary$party <- 'Republican'

# Rep Republican Study
studycmte_summary <- incumbents %>% 
  filter(studycmte == 1) %>% 
  summarise(mean_web_ideology = mean(web_score, na.rm = TRUE),
            sd_web_ideology = sd(web_score, na.rm = TRUE),
            n = sum(!is.na(web_score)))
studycmte_summary$caucus <- 'Republican Study Committee'
studycmte_summary$party <- 'Republican'

# Rep Freedom
freedom_summary <- incumbents %>% 
  filter(freedom == 1) %>% 
  summarise(mean_web_ideology = mean(web_score, na.rm = TRUE),
            sd_web_ideology = sd(web_score, na.rm = TRUE),
            n = sum(!is.na(web_score)))
freedom_summary$caucus <- 'Freedom Caucus'
freedom_summary$party <- 'Republican'

# Merge individual df's
plot_df <- rbind(prog_summary, newdem_summary, bluedog_summary,
                 mainstreet_summary, studycmte_summary, freedom_summary)

rm(prog_summary, newdem_summary, bluedog_summary, mainstreet_summary,
   studycmte_summary, freedom_summary)

# Create 95% CI
plot_df$lb <- plot_df$mean_web_ideology - 1.96*plot_df$sd_web_ideology/sqrt(plot_df$n)
plot_df$ub <- plot_df$mean_web_ideology + 1.96*plot_df$sd_web_ideology/sqrt(plot_df$n)

# Define the desired order of the caucus variable
caucus_order <- c("Progressive Caucus", "New Democratic Coalition", "Blue Dog Coalition",
                  "Main Street Partnership", "Republican Study Committee", "Freedom Caucus")

# Order the caucus variable in the plot_df dataframe
vertical.lines <- c(dem_mean, rep_mean)
# Create Factor Variable
plot_df$caucus <- factor(plot_df$caucus, levels = rev(caucus_order))

# Create Appendix Figure 4
ggplot(plot_df, aes(x = mean_web_ideology, y = caucus, color = party)) +
  geom_pointrange(aes(xmin = lb,
                      xmax = ub),
                  size = 1.3, 
                  position = position_dodge(width = 1)) +
  theme_bw()+
  geom_vline(xintercept = vertical.lines, colour= c('gray1', 'gray55'), size = 1.1, linetype = "longdash") +
  xlab('\nWEB Score')+
  ylab('Congressional Caucus\n') +
  labs(color = 'Party')+
  xlim(-1.1, 1.2) +
  theme(axis.text.x =element_text(size  = 17),
        axis.text.y = element_text(size = 17), 
        axis.title = element_text(size=20),
        legend.text=element_text(size=17),
        legend.title = element_text(size = 17),
        legend.position = 'bottom')+
  guides(fill = guide_legend(keywidth = 4, keyheight = 1),
         linetype=guide_legend(keywidth = 4, keyheight = 1))+
  scale_color_manual(values = c('gray1', 'gray55'))
                    
ggsave('fgE1.tiff', width = 12, height = 8, units = 'in')

# Print Group Means
plot_df

rm(plot_df, caucus_order, dem_mean, rep_mean, vertical.lines)

# Difference of means tests between groups for body of appendix
t.test(incumbents$web_score[incumbents$progressive == 1 & !is.na(incumbents$web_score)], 
       incumbents$web_score[incumbents$newdem == 1 & !is.na(incumbents$web_score)])

t.test(incumbents$web_score[incumbents$bluedog == 1 & !is.na(incumbents$web_score)], 
       incumbents$web_score[incumbents$newdem == 1 & !is.na(incumbents$web_score)])

t.test(incumbents$web_score[incumbents$mainstreet == 1 & !is.na(incumbents$web_score)], 
       incumbents$web_score[incumbents$freedom == 1 & !is.na(incumbents$web_score)])

t.test(incumbents$web_score[incumbents$mainstreet == 1 & !is.na(incumbents$web_score)], 
       incumbents$web_score[incumbents$studycmte == 1 & !is.na(incumbents$web_score)])

t.test(incumbents$web_score[incumbents$freedom == 1 & !is.na(incumbents$web_score)], 
       incumbents$web_score[incumbents$studycmte == 1 & !is.na(incumbents$web_score)])
rm(incumbents)


#############################
# Figure F2: Embeddings Relationships
#############################
# Load full embeddings data
candidate_embeddings <- read.csv('candidate_embeddings.csv')
word_embeddings <- read.csv('word_embeddings.csv')

# Healthcare Example
# Subset word embeddings for healthcare and universal
healthcare <- colMeans(subset(word_embeddings, word == 'healthcare')[-301])
universal <- colMeans(subset(word_embeddings, word == 'universal')[-301])

# Conservative Healthcare
# healthcare + roy embedding
healthcare_conservative <- colMeans(subset(word_embeddings, word == 'healthcare')[-301]) +
  colMeans(subset(candidate_embeddings, candidate_id == 'Chip Roy TX 21 2022')[-301])

# Liberal Healthcare
# healthcare + aoc embedding
healthcare_liberal <- colMeans(subset(word_embeddings, word == 'healthcare')[-301]) +
  colMeans(subset(candidate_embeddings, candidate_id == 'Alexandia Ocasio-Cortez NY 14 2022')[-301])

# Compare Cosine Similarities
cosine(healthcare_conservative, universal)
cosine(healthcare_liberal, universal)

# Pull out just candidate embedding values
candidate_embeddings_subset <- subset(candidate_embeddings, select = -301)
# Set the list of words for each policy position
# Conservative
government_conservative <- c('spending', 'regulations', 'prudent', 'fiscal')
immigration_conservative <- c('incursions', 'enforces', 'prosecutes', 'secures')
abortion_conservative <- c('prolife', 'families', 'heartbeat', 'prohibit')
education_conservative <- c('parents', 'choice', 'homeschooling', 'transparency')

# Liberal
environment_liberal <- c('renewable', 'climate', 'fossil', 'color')
healthcare_liberal <- c('universal','singlepayer', 'expand', 'medicareforall')
guns_liberal <- c('background', 'ban', 'assault', 'safety')
wage_liberal <- c('living', 'minimum', 'affordable', 'cost')

# Subset and take the mean word embeddings for each of policy area keyword and target word
gov_con_embeddings <- colMeans(subset(word_embeddings, word %in% government_conservative)[-301])
government <- colMeans(subset(word_embeddings, word == 'government')[-301])

im_con_embeddings <- colMeans(subset(word_embeddings, word %in% immigration_conservative)[-301])
immigration <- colMeans(subset(word_embeddings, word == 'immigration')[-301])

ab_con_embeddings <- colMeans(subset(word_embeddings, word %in% abortion_conservative)[-301])
abortion <- colMeans(subset(word_embeddings, word == 'abortion')[-301])

edu_con_embeddings <- colMeans(subset(word_embeddings, word %in% education_conservative)[-301])
education <- colMeans(subset(word_embeddings, word == 'education')[-301])

env_lib_embeddings <- colMeans(subset(word_embeddings, word %in% environment_liberal)[-301])
environment <- colMeans(subset(word_embeddings, word == 'environment')[-301])

hth_lib_embeddings <- colMeans(subset(word_embeddings, word %in% healthcare_liberal)[-301])
healthcare <- colMeans(subset(word_embeddings, word == 'healthcare')[-301])

gun_lib_embeddings <- colMeans(subset(word_embeddings, word %in% guns_liberal)[-301])
guns <- colMeans(subset(word_embeddings, word == 'guns')[-301])

wag_lib_embeddings <- colMeans(subset(word_embeddings, word %in% wage_liberal)[-301])
wage <- colMeans(subset(word_embeddings, word == 'wage')[-301])

# Create Empty Data Frame
df <- data.frame(candidate_id = character(),
                 gov_con = numeric(),
                 im_con = numeric(),
                 ab_con= numeric(),
                 edu_con = numeric(),
                 env_lib = numeric(),
                 hth_lib = numeric(),
                 gun_lib = numeric(),
                 wag_lib = numeric())

# Loop to compare similarity for each candidate
for(i in 1:nrow(candidate_embeddings)){
  # Subset who candidate is
  cand <- candidate_embeddings[i, 301]
  # Isolate candidate embedding
  c_emb <- colMeans(candidate_embeddings_subset[candidate_embeddings$candidate_id == cand, ])
  # Government 
  c_emb_gov <- c_emb + government
  gov_con_sim <- cosine(c_emb_gov, gov_con_embeddings)
  # Immigration -- add candidate embedding to target word
  c_emb_im <- c_emb + immigration
  im_con_sim <- cosine(c_emb_im, im_con_embeddings)
  # Abortion -- add candidate embedding to target word
  c_emb_ab <- c_emb + abortion
  ab_con_sim <- cosine(c_emb_ab, ab_con_embeddings)
  # Education -- add candidate embedding to target word
  c_emb_edu <- c_emb + education
  edu_con_sim <- cosine(c_emb_edu, edu_con_embeddings)
  # Environment -- add candidate embedding to target word
  c_emb_env <- c_emb + environment
  env_lib_sim <- cosine(c_emb_env, env_lib_embeddings)
  # Healthcare -- add candidate embedding to target word
  c_emb_hth <- c_emb + healthcare
  hth_lib_sim <- cosine(c_emb_hth, hth_lib_embeddings)
  # Guns -- add candidate embedding to target word
  c_emb_gun <- c_emb + guns
  gun_lib_sim <- cosine(c_emb_gun, gun_lib_embeddings)
  # Wages -- add candidate embedding to target word
  c_emb_wag <- c_emb + wage
  wag_lib_sim <- cosine(c_emb_wag, wag_lib_embeddings)
  # Merge all data into df
  df[i,] <- c(cand, gov_con_sim, im_con_sim, ab_con_sim, edu_con_sim,
              env_lib_sim, hth_lib_sim, gun_lib_sim, wag_lib_sim)
}
# Convert scores to numbers
df$gov_con <- as.numeric(df$gov_con)
df$im_con <- as.numeric(df$im_con)
df$ab_con <- as.numeric(df$ab_con)
df$edu_con <- as.numeric(df$edu_con)

df$env_lib <- as.numeric(df$env_lib)
df$hth_lib <- as.numeric(df$hth_lib)
df$gun_lib <- as.numeric(df$gun_lib)
df$wag_lib <- as.numeric(df$wag_lib)

# Merge with master
master <- merge(master, df, by = 'candidate_id', all.x = TRUE)

# Run models to predict specific policy area score with WEB Score as IV
m1 <- lm(as.numeric(gov_con)~web_score, data = master)
# Extract Coefficient and Standard Error
coef1 <- m1$coefficients[2]
se1 <- coef(summary(m1))[, "Std. Error"][2]

m2 <- lm(as.numeric(im_con)~web_score, data = master)
# Extract Coefficient and Standard Error
coef2 <- m2$coefficients[2]
se2 <- coef(summary(m2))[, "Std. Error"][2]

m3 <- lm(as.numeric(ab_con)~web_score, data = master)
# Extract Coefficient and Standard Error
coef3 <- m3$coefficients[2]
se3 <- coef(summary(m3))[, "Std. Error"][2]

m4 <- lm(as.numeric(edu_con)~web_score, data = master)
# Extract Coefficient and Standard Error
coef4 <- m4$coefficients[2]
se4 <- coef(summary(m4))[, "Std. Error"][2]

m5 <- lm(as.numeric(env_lib)~web_score, data = master)
# Extract Coefficient and Standard Error
coef5 <- m5$coefficients[2]
se5 <- coef(summary(m5))[, "Std. Error"][2]

m6 <- lm(as.numeric(hth_lib)~web_score, data = master)
# Extract Coefficient and Standard Error
coef6 <- m6$coefficients[2]
se6 <- coef(summary(m6))[, "Std. Error"][2]

m7 <- lm(as.numeric(gun_lib)~web_score, data = master)
# Extract Coefficient and Standard Error
coef7 <- m7$coefficients[2]
se7 <- coef(summary(m7))[, "Std. Error"][2]

m8 <- lm(as.numeric(wag_lib)~web_score, data = master)
# Extract Coefficient and Standard Error
coef8 <- m8$coefficients[2]
se8 <- coef(summary(m8))[, "Std. Error"][2]

# Plot Coefficients
# Create a vector of model names
models <- c("Government \n(Conservative)", "Immigration \n(Conservative)", 
            "Abortion \n(Conservative)", "Education \n(Conservative)", 
            "Environment \n(Liberal)", "Healthcare \n(Liberal)", 
            "Guns \n(Liberal)", "Wages \n(Liberal)")

# Combine coefficients and standard errors into vectors
coefs <- c(coef1, coef2, coef3, coef4, coef5, coef6, coef7, coef8)
se <- c(se1, se2, se3, se4, se5, se6, se7, se8)

# Merge into single DF
plot_df <- data.frame(mod = models,
                      coefficients = coefs,
                      se = se)

# Create order vector for plot x-axis
order <- c("Abortion \n(Conservative)", "Education \n(Conservative)",
                        "Government \n(Conservative)", "Immigration \n(Conservative)", 
                        "Guns \n(Liberal)", "Environment \n(Liberal)", 
                        "Healthcare \n(Liberal)", "Wages \n(Liberal)")

# Re level factor variable
plot_df$mod <- factor(plot_df$mod, levels = order)



# Create the plot
ggplot(plot_df, aes(x = mod, y = coefficients)) +
  geom_hline(yintercept = 0, color = "black", linetype = "solid") +
  geom_point(size =4) +
  geom_errorbar(aes(ymin = coefficients - 1.96 * se, ymax = coefficients + 1.96 * se),
                width = 0.2, color = "black") +
  labs(x = "\nModel", y = "Effect of WEB Score\nOn Cosine Similarity") +
  theme_bw()+
  theme(axis.title = element_text(size = 20),
        axis.text = element_text(size = 15))

ggsave('fgf2.tiff', width = 14, height = 7, units = 'in')

